rm(list = ls())

library(data.table)
library(estimatr)
library(dplyr)
library(pryr)

add_backticks = function(x) {
  paste0("`", x, "`")
}

x_lm_formula = function(x) {
  paste(add_backticks(x), collapse = " + ")
}


load('./data/panel_month_dummies.RData')

# heterogeneous effects by vote share for far-right parties in national elections

month_cols <-  grep('month_2', colnames(panel), value=TRUE)

panel[,less_college:=(100-educ_diplBA)]
panel[,(paste('educ',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['less_college']] ), .SDcols = month_cols]

panel[,(paste('foreign',month_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['foreign_pop']] ), .SDcols = month_cols]

educ_cols <-  grep('educ_month', colnames(panel), value=TRUE)
educ_cols <- educ_cols[-157]
foreign_cols <- grep('foreign_month', colnames(panel), value=TRUE)
foreign_cols <- foreign_cols[-157]

panel[,time:= as.numeric(round((month-as.Date('2020-01-01',format='%Y-%m-%d'))/(365.25/12)))]
panel[,trend_econ:=time*losing_income_d]

province_cols <- grep('code_province_', colnames(panel), value=TRUE)
panel[,(paste('trend',province_cols,sep = '_')):= lapply(.SD, function(x) 
  x * panel[['time']] ), .SDcols = province_cols]
trend_cols <- grep('trend_code_province', colnames(panel), value=TRUE)
trend_cols <- trend_cols[-1]

flex_controls <- c(educ_cols, foreign_cols, trend_cols)

formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid2*losing_income_d*extr_right_d + trend_econ +", x_lm_formula(flex_controls)))

out_econ_het_vshare_con_trend_all <- lm_robust(formula = formula_flex_controls,
                                               data=panel, se_type='stata',
                                               clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_econ_het_vshare_con_trend_all, file = './output/out_econ_het_vshare_con_trend_all.RData')

# heterogeneous effects by far-right mayor
formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid2*losing_income_d*extr_right_mayor + trend_econ +", x_lm_formula(flex_controls)))

out_econ_het_con_trend_all <- lm_robust(formula = formula_flex_controls,
                                               data=panel, se_type='stata',
                                               clusters = panelvar, fixed_effects = ~ panelvar + month)

save(out_econ_het_con_trend_all, file = './output/out_econ_het_con_trend_all.RData')


# randomization inference to compute p-values on heterogeneous effects by far-right mayor
## runs for about 40 hrs in high performance computing
ri_het <- function(permutation_data, n_reps) {
  
  marginal_effect <- vector(mode='list', length=n_reps)
  z <- vector(mode='list', length=n_reps)
  effect_no_mayor <- vector(mode='list', length=n_reps)
  effect_mayor <- vector(mode='list', length=n_reps)
  
  for (r in seq(n_reps)) {
    print(paste("Rep", r))
    true_treatments <- permutation_data[, .(is_treated=unique(extr_right_mayor)==1), by=panelvar]
    
    permutation_treatments <- data.table(
      is_treated_permutation = sample(true_treatments$is_treated),
      permutation_id = unique(permutation_data$panelvar)
    )
    
    permutation_data$is_treated_permutation <- NULL
    permutation_data <- merge(permutation_data, permutation_treatments, by.x='panelvar', by.y='permutation_id')
    permutation_data[,extr_right_mayor_permutation:=ifelse(is_treated_permutation==TRUE,1,0)]
    
    
    formula_flex_controls <- as.formula(paste("hc_pc_asians ~ covid2*losing_income_d*extr_right_mayor_permutation + trend_econ +", x_lm_formula(flex_controls)))
    
    out_econ_het_con_trend_all <- lm_robust(formula = formula_flex_controls,
                                            data=permutation_data, se_type='stata',
                                            clusters = panelvar, fixed_effects = ~ panelvar + month)
    
    
    marginal_effect[[r]] <- tidy(out_econ_het_con_trend_all) %>% filter(term==c('covid2:losing_income_d:extr_right_mayor_permutation')) %>% select(estimate)
    z[[r]] <-
      ((
        tidy(out_econ_het_con_trend_all) %>% filter(term == c('covid2:losing_income_d')) %>% select(estimate) +
          tidy(out_econ_het_con_trend_all) %>% filter(
            term == c('covid2:losing_income_d:extr_right_mayor_permutation')
          ) %>% select(estimate)
      ) - (
        tidy(out_econ_het_con_trend_all) %>% filter(term == c('covid2:losing_income_d')) %>% select(estimate)
      )
      ) / (sqrt(((
        tidy(out_econ_het_con_trend_all) %>% filter(term == c('covid2:losing_income_d')) %>% select(std.error)
      ) ^ 2) + ((
        vcov(out_econ_het_con_trend_all)['covid2:losing_income_d', 'covid2:losing_income_d']
      ) +
        (
          vcov(out_econ_het_con_trend_all)['covid2:losing_income_d:extr_right_mayor_permutation', 'covid2:losing_income_d:extr_right_mayor_permutation']
        ) +
        (2 *
           (
             vcov(out_econ_het_con_trend_all)['covid2:losing_income_d', 'covid2:losing_income_d:extr_right_mayor_permutation']
           )))))
    effect_no_mayor[[r]] <- tidy(out_econ_het_con_trend_all) %>% filter(term==c('covid2:losing_income_d')) %>% select(estimate)
    effect_mayor[[r]] <- tidy(out_econ_het_con_trend_all) %>% filter(term==c('covid2:losing_income_d')) %>% select(estimate) +
      tidy(out_econ_het_con_trend_all) %>% filter(term==c('covid2:losing_income_d:extr_right_mayor_permutation')) %>% select(estimate)
    
    
    print(paste('Mem used', pryr::mem_used()/1e9, 'GB'))
    ll <- as.list(ls())
    names(ll) <- ll
    print(lapply(ll, function(x) pryr::object_size(get(x))))
    
    rm(out_econ_het_con_trend_all,
       formula_flex_controls, permutation_treatments, true_treatments)
    
    gc()
    
  }
  return(list(marginal_effect = marginal_effect,
              z = z,
              effect_no_mayor = effect_no_mayor,
              effect_mayor = effect_mayor))
}


panel <- panel[!is.na(extr_right_mayor)]

set.seed(879)
rr <- ri_het(panel, 300)

save(rr, file='./het_effects_ri.RData')